# -*-Python-*-

# Simplification of relative attention which just adds a learned bias value
# which depends on the relative position and the attention head. Biases are
# shared across layers.

import mesh_tensorflow.transformer.transformer
import mesh_tensorflow.transformer.transformer_layers

transformer_layers.SelfAttention.relative_attention_type = "bias_shared"
